% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Replication "Deconstructing the Yield Curve"
% Crump and Gospodinov (2024)
% Date: 26-JUL-2024
% Code for simulations w/ external predictors
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clear all; clc; close all;
rng(12345);
addpath('functions');
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Compile C code
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
mex 'functions/buildVAR.c'
mex 'functions/buildForwards.c'
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Settings
%   3-factor model: factorFwdMats = [1,5,10];
%   5-factor model: factorFwdMats = [1,3,5,7,10];
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
o.dataSet               = 'gsw';
o.freq                  = 'm';
o.startDate             = 197201;
o.endDate               = 202212;
o.predNames             = [];
p.N                     = 120;
p.yldMats                 = (12:12:120);
%p.factorFwdMats           = [1,3,5,7,10];
p.factorFwdMats           = [1,5,10];
p.BH_B_BC               = 0;
p.CG_B_BC               = 0;
%
p.ExtPred_BC            = 1000;
sp.S                    = 5000;
sp.B                    = 399;
sp.T                    = 600;
sp.M                    = 'ROT';

p.H                     = p.yldMats(1);
p.K                     = numel(p.factorFwdMats);
p.L                     = 2;

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Load bond data
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
disp(' '); disp('Loading Data...'); disp(' ');
%
dataAll = loadBondData(p.N);
dataTag = ['dataAll.', o.dataSet, '.', o.freq];
for i = {'prc', 'yld', 'ret', 'xret', 'fwd', 'dret'}, eval([i{1}, 'Raw = ', dataTag, '.', i{1}, ';']);  end
data.mats = (1:p.N);
datesBonds = eval([dataTag, '.dates']);
data.period = eval([dataTag, '.period']);
datesCommon = datesBonds;
%
if ~isempty(o.startDate), datesCommon = datesCommon(datesCommon >= o.startDate); end
if ~isempty(o.endDate), datesCommon = datesCommon(datesCommon <= o.endDate); end
%

tmp1 = readtable('../input/CPILFESL.xls', 'DataRange', 'A12:B806');
predRaw1 = 100*((tmp1.Var2(4:end)./tmp1.Var2(1:end-3)).^4-1);
datesPred1 = 100*year(tmp1.Var1)+month(tmp1.Var1);
datesPred1 = datesPred1(13:end);

tmp2 = readtable('../input/INDPRO.xls', 'DataRange', 'A12:B1265');
predRaw2 = 100*((tmp2.Var2(4:end)./tmp2.Var2(1:end-3)).^4-1);
datesPred2 = 100*year(tmp2.Var1)+month(tmp2.Var1);
datesPred2 = datesPred2(13:end);

dates = datesCommon;
actStartDate = max([dates(1),datesPred1(1),datesPred2(1)]);
actEndDate = min([dates(end),datesPred1(end),datesPred2(end)]);

idxBonds = (datesBonds >= actStartDate) & (datesBonds <= actEndDate);
idxPred1 = (datesPred1 >= actStartDate) & (datesPred1 <= actEndDate);
idxPred2 = (datesPred2 >= actStartDate) & (datesPred2 <= actEndDate);

%
data.prc = prcRaw(idxBonds,1:p.N);
data.yld = yldRaw(idxBonds,1:p.N);
data.fwd = fwdRaw(idxBonds,1:p.N);
data.dret = dretRaw(idxBonds,1:p.N);
data.ret = retRaw(idxBonds,1:p.N);
data.xret = xretRaw(idxBonds,1:p.N);
data.T = size(data.prc,1);
data.pred = [predRaw1(idxPred1) predRaw2(idxPred2)];
% 
data2.yld = data.yld(:,p.yldMats);
data2.fwd = [data2.yld(:,1) data2.yld(:,2:end).*(ones(data.T,1)*p.yldMats(2:end))/data.period-data2.yld(:,1:end-1).*(ones(data.T,1)*p.yldMats(1:end-1))/data.period];
data2.dret = zeros(size(data2.yld));
data2.dret(1:p.H,:) = NaN;
data2.dret(p.H+1:end,2:end) = data2.fwd(1:end-p.H,2:end) - data2.fwd(p.H+1:end,1:end-1);
data2.xret = cumsum(data2.dret,2);
%
if strcmp(sp.T,'SAME'), sp.T=data.T; end
if strcmp(sp.M,'ROT'), sp.M=ceil((sp.T*p.N)^(2/5)); end
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% LLSW data-driven truncation parameters
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
sp.nu_LLSW_EWC = floor(0.4*sp.T^(2/3));
sp.S_LLSW_NW = ceil(1.3*sqrt(sp.T));

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Estimate parameters of data generating process
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
X = data2.fwd(:,p.factorFwdMats);
X1 = [ones(data.T,1) X];
PsiHat = inv(X1(1:end-1,:)'*X1(1:end-1,:))*X1(1:end-1,:)'*X(2:end,:);
PhiHat = PsiHat(2:end,:)';
muHat =  PsiHat(1,:)';
VHat = X(2:end,:) - X1(1:end-1,:)*PsiHat;
SigHat = VHat'*VHat/(data.T - numel(PsiHat));
loadHat = inv(X1'*X1)*X1'*data2.fwd;
eHat = data2.fwd - X1*loadHat;
tmp = eHat'*eHat/data.T;
[tmpS,tmpV,tmpD] = svd(tmp);
SigEHatSqrt = tmpS*((tmpV).^(1/2))*tmpD';
data2.SigEHatSqrt = SigEHatSqrt;
data2.SigHat = SigHat;
data2.PsiHat = PsiHat;
data2.PhiHat = PhiHat;
data2.muHat = muHat;
data2.X = X;
data2.X1 = X1;
data2.loadHat = loadHat;
%
extPred = var1BC(data.pred,p.ExtPred_BC);
data2.pred.PsiHat = extPred.PsiBC;
data2.pred.PhiHat = extPred.PhiBC;
data2.pred.muHat = extPred.muBC;
data2.pred.SigHat = extPred.SigBC;

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Estimate in-sample regressions
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
betaHatAllData = NaN(numel(p.yldMats),size(X1,2)+size(data.pred,2));
for i = 1:numel(p.yldMats)
    betaHatAllData(i,:) = regress(data2.xret(p.H+1:end,i),[X1(1:end-p.H,:), data.pred(1:end-p.H,:)]);
end

betaHatAll = NaN(sp.S,numel(p.yldMats),size(X1,2)+size(data.pred,2));
betaSEHatAll = NaN(sp.S,numel(p.yldMats),size(X1,2)+size(data.pred,2));
bsBetaHatAll_CG = NaN(sp.S,sp.B,numel(p.yldMats),size(X1,2)+size(data.pred,2));
bsBetaHatAll_BH = NaN(sp.S,sp.B,numel(p.yldMats),size(X1,2)+size(data.pred,2));
bsBetaHatAll_FullOracle = NaN(sp.S,sp.B,numel(p.yldMats),size(X1,2)+size(data.pred,2));
bsBetaSEHatAll_CG = NaN(sp.S,sp.B,numel(p.yldMats),size(X1,2)+size(data.pred,2));
bsBetaSEHatAll_BH = NaN(sp.S,sp.B,numel(p.yldMats),size(X1,2)+size(data.pred,2));
bsBetaSEHatAll_FullOracle = NaN(sp.S,sp.B,numel(p.yldMats),size(X1,2)+size(data.pred,2));
bsBetaSEHatAll_LLSW_NW = NaN(sp.S,numel(p.yldMats),size(X1,2)+size(data.pred,2));
bsBetaSEHatAll_LLSW_EWC = NaN(sp.S,numel(p.yldMats),size(X1,2)+size(data.pred,2));
simEigvals = NaN(sp.S,numel(p.yldMats));

parpool;
fprintf('\nProgress:\n');
fprintf(['\n' repmat('.',1,sp.S) '\n\n']);

tic
parfor s=1:sp.S
    simOut = simsExtPredFcn(s,data2,data,p,sp);
    betaHatAll(s,:,:) = simOut.betaHatAll;
    betaSEHatAll(s,:,:) = simOut.betaSEHatAll;
    bsBetaHatAll_CG(s,:,:,:) = simOut.bsBetaHatAll_CG;
    bsBetaHatAll_BH(s,:,:,:) = simOut.bsBetaHatAll_BH;
    bsBetaHatAll_FullOracle(s,:,:,:) = simOut.bsBetaHatAll_FullOracle;
    bsBetaSEHatAll_CG(s,:,:,:) = simOut.bsBetaSEHatAll_CG;
    bsBetaSEHatAll_BH(s,:,:,:) = simOut.bsBetaSEHatAll_BH;
    bsBetaSEHatAll_FullOracle(s,:,:,:) = simOut.bsBetaSEHatAll_FullOracle;
    bsBetaSEHatAll_LLSW_NW(s,:,:) = simOut.betaSEHatAll_LLSW_NW;
    bsBetaSEHatAll_LLSW_EWC(s,:,:) = simOut.betaSEHatAll_LLSW_EWC;
    simEigvals(s,:) = simOut.simEigvals;
    fprintf('\b|\n');
end
disp(' ');
toc
disp(' ');
delete(gcp('nocreate'));

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% True parameters
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Et = eye(numel(p.yldMats));
Et(1,:) = [];
Eth = eye(numel(p.yldMats));
Eth(end,:) = [];

tmp = 0;
for i = 1:p.H, tmp = tmp + PhiHat^(i-1)*muHat; end

beta0 = zeros(size(squeeze(mean(betaHatAll(:,:,1:end)))));
beta0(2:end,2:size(X1,2)) = cumsum(Et*loadHat(2:end,:)' - Eth*loadHat(2:end,:)'*PhiHat^p.H);
beta0(2:end,1) = cumsum((Et-Eth)*loadHat(1,:)' - Eth*loadHat(2:end,:)'*tmp);

pval_Size_CG = NaN(sp.S,numel(p.yldMats),size(X1,2));
pval_Size_BH = NaN(sp.S,numel(p.yldMats),size(X1,2));
pval_Size_FullOracle = NaN(sp.S,numel(p.yldMats),size(X1,2));
pval_Power_CG = NaN(sp.S,numel(p.yldMats),size(X1,2));
pval_Power_BH = NaN(sp.S,numel(p.yldMats),size(X1,2));
pval_Power_FullOracle = NaN(sp.S,numel(p.yldMats),size(X1,2));
tstat_Size_NW = NaN(sp.S,numel(p.yldMats),size(X1,2));
tstat_Power_NW = NaN(sp.S,numel(p.yldMats),size(X1,2));
tstat_Size_LLSW_EWC = NaN(sp.S,numel(p.yldMats),size(X1,2));
tstat_Power_LLSW_EWC = NaN(sp.S,numel(p.yldMats),size(X1,2));
tstat_Size_LLSW_NW = NaN(sp.S,numel(p.yldMats),size(X1,2));
tstat_Power_LLSW_NW = NaN(sp.S,numel(p.yldMats),size(X1,2));

for s = 1:sp.S
    for i = 2:numel(p.yldMats)
        for j = 1:size(X1,2)            
            tstat = betaHatAll(s,i,j)./betaSEHatAll(s,i,j);
            tstat0  = (betaHatAll(s,i,j)-beta0(i,j))./betaSEHatAll(s,i,j);
            tstat_Size_NW(s,i,j) = tstat0;
            tstat_Power_NW(s,i,j) = tstat;
            %
            tmp = (bsBetaHatAll_CG(s,:,i,j) - betaHatAll(s,i,j))./bsBetaSEHatAll_CG(s,:,i,j);
            pval_Size_CG(s,i,j) = mean(abs(tstat0) < abs(tmp));
            pval_Power_CG(s,i,j) = mean(abs(tstat) < abs(tmp));
            tmp = (bsBetaHatAll_BH(s,:,i,j) - betaHatAll(s,i,j))./bsBetaSEHatAll_BH(s,:,i,j);
            pval_Size_BH(s,i,j) = mean(abs(tstat0) < abs(tmp));
            pval_Power_BH(s,i,j) = mean(abs(tstat) < abs(tmp));
            tmp = (bsBetaHatAll_FullOracle(s,:,i,j) - beta0(i,j))./bsBetaSEHatAll_FullOracle(s,:,i,j);
            pval_Size_FullOracle(s,i,j) = mean(abs(tstat0) < abs(tmp));
            pval_Power_FullOracle(s,i,j) = mean(abs(tstat) < abs(tmp));
            %
            tstat_Power_LLSW_EWC(s,i,j) = betaHatAll(s,i,j)./bsBetaSEHatAll_LLSW_EWC(s,i,j);
            tstat_Size_LLSW_EWC(s,i,j) = (betaHatAll(s,i,j)-beta0(i,j))./bsBetaSEHatAll_LLSW_EWC(s,i,j);
            %
            tstat_Power_LLSW_NW(s,i,j) = betaHatAll(s,i,j)./bsBetaSEHatAll_LLSW_NW(s,i,j);
            tstat_Size_LLSW_NW(s,i,j) = (betaHatAll(s,i,j)-beta0(i,j))./bsBetaSEHatAll_LLSW_NW(s,i,j);            
        end
        for j = size(X1,2)+1:size(X1,2)+p.L
            tstat = betaHatAll(s,i,j)./betaSEHatAll(s,i,j);
            tstat0  = (betaHatAll(s,i,j)-beta0(i,j))./betaSEHatAll(s,i,j);
            tstat_Size_NW(s,i,j) = tstat0;
            %
            tmp = (bsBetaHatAll_CG(s,:,i,j) - betaHatAll(s,i,j))./bsBetaSEHatAll_CG(s,:,i,j);
            pval_Size_CG(s,i,j) = mean(abs(tstat0) < abs(tmp));
            tmp = (bsBetaHatAll_BH(s,:,i,j) - 0)./bsBetaSEHatAll_BH(s,:,i,j);
            pval_Size_BH(s,i,j) = mean(abs(tstat0) < abs(tmp));
            tmp = (bsBetaHatAll_FullOracle(s,:,i,j) - 0)./bsBetaSEHatAll_FullOracle(s,:,i,j);
            pval_Size_FullOracle(s,i,j) = mean(abs(tstat0) < abs(tmp));
            %
            tstat_Size_LLSW_EWC(s,i,j) = (betaHatAll(s,i,j)-beta0(i,j))./bsBetaSEHatAll_LLSW_EWC(s,i,j);
            %
            tstat_Size_LLSW_NW(s,i,j) = (betaHatAll(s,i,j)-beta0(i,j))./bsBetaSEHatAll_LLSW_NW(s,i,j);            
        end
    end
end


% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% CG and BH
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Nominal Size 10%
squeeze(mean(pval_Size_CG < 0.1))
squeeze(mean(pval_Size_BH < 0.1))
squeeze(mean(pval_Size_FullOracle < 0.1))
squeeze(mean(pval_Power_CG < 0.1))
squeeze(mean(pval_Power_BH < 0.1))
squeeze(mean(pval_Power_FullOracle < 0.1))
% Nominal Size 5%
squeeze(mean(pval_Size_CG < 0.05));
squeeze(mean(pval_Size_BH < 0.05));
squeeze(mean(pval_Power_CG < 0.05));
squeeze(mean(pval_Power_BH < 0.05));

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Standard Newey-West
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
squeeze(mean(abs(tstat_Size_NW) > norminv(.95),1))
squeeze(mean(abs(tstat_Size_NW) > norminv(.975),1))

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% LLSW: EWC
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Nominal Size 10%
squeeze(mean(abs(tstat_Size_LLSW_EWC) > tinv(.95,sp.nu_LLSW_EWC),1))
% Nominal Size 5%
squeeze(mean(abs(tstat_Size_LLSW_EWC) > tinv(.975,sp.nu_LLSW_EWC),1))

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% LLSW: NW
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 0.95 and 0.975
cv_LLSW_NW_10 = load('../input/cv_nw_10.mat');
cv_LLSW_NW_5 = load('../input/cv_nw_05.mat');
[~, idx] = min(abs(sp.S_LLSW_NW/sp.T-cv_LLSW_NW_10.cv_nw_10(:,1,1)));
% Nominal Size 10%
squeeze(mean(abs(tstat_Size_LLSW_NW) > sqrt(cv_LLSW_NW_10.cv_nw_10(idx,2,1)),1))
% Nominal Size 5%
squeeze(mean(abs(tstat_Size_LLSW_NW) > sqrt(mean(cv_LLSW_NW_5.cv_nw_05(idx,2,1))),1))
% Power Size 10%
squeeze(mean(abs(tstat_Power_LLSW_NW) > sqrt(cv_LLSW_NW_10.cv_nw_10(idx,2,1)),1))

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Eigenvalue share
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
mean(simEigvals(:,1)./sum(simEigvals,2))
mean(simEigvals(:,2)./sum(simEigvals,2))
mean(simEigvals(:,3)./sum(simEigvals,2))

tmpNumFac = numel(p.factorFwdMats);
if p.BH_B_BC>0, tmpBC = 'simsVAR1ExtPredBC_'; else tmpBC = 'simsVAR1ExtPredNoBC_'; end
save(['../output/', tmpBC, num2str(numel(p.factorFwdMats)), 'Fac_S', num2str(sp.S), '_T', num2str(sp.T)], 'pval*', 'tstat*', 'beta0', 'sp', 'simEigvals');

